{
 "cells": [
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-03-03T13:05:46.255822Z",
     "start_time": "2025-03-03T13:05:45.083154Z"
    }
   },
   "cell_type": "code",
   "source": [
    "import os\n",
    "\n",
    "from sklearn.datasets import fetch_california_housing\n",
    "from sklearn.linear_model import LinearRegression, SGDRegressor, Ridge, LogisticRegression, Lasso\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "from sklearn.metrics import mean_squared_error, classification_report, roc_auc_score\n",
    "import joblib\n",
    "import pandas as pd\n",
    "import numpy as np"
   ],
   "id": "a4781dc0b1b833fb",
   "outputs": [],
   "execution_count": 1
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-03-03T13:05:46.260873Z",
     "start_time": "2025-03-03T13:05:46.257229Z"
    }
   },
   "cell_type": "code",
   "source": "sgd = SGDRegressor(eta0=0.01,penalty='l2', max_iter=1000)",
   "id": "5f3291b0abc9ac2",
   "outputs": [],
   "execution_count": 2
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-03-03T13:05:56.346395Z",
     "start_time": "2025-03-03T13:05:56.334895Z"
    }
   },
   "cell_type": "code",
   "source": [
    "# 获取数据\n",
    "fe_cal = fetch_california_housing(data_home='data')\n",
    "\n",
    "# 分割数据集到训练集和测试集\n",
    "x_train, x_test, y_train, y_test = train_test_split(fe_cal.data, fe_cal.target, test_size=0.25, random_state=1)"
   ],
   "id": "914206298bf85f62",
   "outputs": [],
   "execution_count": 4
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-03-03T13:07:02.482768Z",
     "start_time": "2025-03-03T13:07:02.477478Z"
    }
   },
   "cell_type": "code",
   "source": [
    "std_x = StandardScaler()\n",
    "x_train = std_x.fit_transform(x_train) \n",
    "x_test = std_x.transform(x_test) #测试集标准化\n",
    "std_y = StandardScaler()"
   ],
   "id": "341b99191f562e67",
   "outputs": [],
   "execution_count": 6
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-03-03T13:07:03.507081Z",
     "start_time": "2025-03-03T13:07:03.500572Z"
    }
   },
   "cell_type": "code",
   "source": [
    "# # # 岭回归去进行房价预测\n",
    "#岭回归是对线性回归加入L2正则化，L2正则化是对系数的平方和进行惩罚\n",
    "#alpha就是补偿的系数\n",
    "#正规方程求解，加补偿就可以让正规方程可逆\n",
    "rd = Ridge(alpha=0.02)\n",
    "\n",
    "rd.fit(x_train, y_train)\n",
    "\n",
    "print(rd.coef_)\n",
    "#\n",
    "# # 预测测试集的房子价格\n",
    "print(rd.predict(x_test).shape)\n",
    "# y_rd_predict = std_y.inverse_transform(rd.predict(x_test))\n",
    "y_predict = rd.predict(x_test)\n",
    "# print(\"岭回归里面每个房子的预测价格：\", y_rd_predict)\n",
    "\n",
    "print(\"岭回归的均方误差：\", mean_squared_error(y_test, y_predict))\n",
    "# print(\"岭回归的均方误差：\", mean_squared_error(std_y.inverse_transform(y_test), y_rd_predict))"
   ],
   "id": "dfccaf2025a62371",
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[ 0.83166963  0.12159681 -0.26758236  0.30983534 -0.00517992 -0.04040432\n",
      " -0.90735215 -0.88211025]\n",
      "(5160,)\n",
      "岭回归的均方误差： 0.5356531179270397\n"
     ]
    }
   ],
   "execution_count": 7
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": "#岭回归的均方误差： 0.5356531179270397 ",
   "id": "e83232f564b79841"
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
